Code
suppressPackageStartupMessages (library (readxl))
suppressPackageStartupMessages (library (tidyverse))
suppressPackageStartupMessages (library (plotly))
suppressPackageStartupMessages (library (fmsb))
data = read_excel ("Project_1_Data.xlsx" , sheet = "pooled123" )
filteredData = select (data, PID, BSSQ_1: BSSQ_15, ASSQ_1: ASSQ_15, age, VRexperience, ssq_full)
#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate (filteredData, d_discomfort = ASSQ_1 - BSSQ_1)
filteredData = mutate (filteredData, d_fatigue = ASSQ_2 - BSSQ_2)
filteredData = mutate (filteredData, d_headache = ASSQ_3 - BSSQ_3)
filteredData = mutate (filteredData, d_eyestrain = ASSQ_4 - BSSQ_4)
filteredData = mutate (filteredData, d_difficulty_focusing = ASSQ_5 - BSSQ_5)
filteredData = mutate (filteredData, d_salivation = ASSQ_6 - BSSQ_6)
filteredData = mutate (filteredData, d_sweating = ASSQ_7 - BSSQ_7)
filteredData = mutate (filteredData, d_nausea = ASSQ_8 - BSSQ_8)
filteredData = mutate (filteredData, d_difficulty_concentrating = ASSQ_9 - BSSQ_9)
filteredData = mutate (filteredData, d_fullness_of_head = ASSQ_10 - BSSQ_10)
filteredData = mutate (filteredData, d_blurred_vision = ASSQ_11 - BSSQ_11)
filteredData = mutate (filteredData, d_dizziness_o = ASSQ_12 - BSSQ_12)
filteredData = mutate (filteredData, d_dizziness_c = ASSQ_13 - BSSQ_13)
filteredData = mutate (filteredData, d_vertigo = ASSQ_14 - BSSQ_14)
filteredData = mutate (filteredData, d_stomach_awareness = ASSQ_15 - BSSQ_15)
#reclasss VR experience as factor (was chr)
filteredData$ VRexperience = as.factor (filteredData$ VRexperience)
#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom
filteredData = mutate (filteredData, age_group = case_when (
age >= 16 & age <= 21 ~ "16 to 21" ,
age >= 22 & age <= 29 ~ "22 to 29" ,
age >= 30 & age <= 37 ~ "30 to 37" ,
age >= 38 & age <= 45 ~ "38 to 45" ,
age > 45 ~ "above 45"
))
#convert the age groups into factors
filteredData$ age_group = as.factor (filteredData$ age_group)
#renaming columns to be more informative
names (filteredData)[names (filteredData) == 'BSSQ_1' ] <- 'BSSQ_discomfort'
names (filteredData)[names (filteredData) == 'BSSQ_2' ] <- 'BSSQ_fatigue'
names (filteredData)[names (filteredData) == 'BSSQ_3' ] <- 'BSSQ_headache'
names (filteredData)[names (filteredData) == 'BSSQ_4' ] <- 'BSSQ_eyestrain'
names (filteredData)[names (filteredData) == 'BSSQ_5' ] <- 'BSSQ_difficulty_focusing'
names (filteredData)[names (filteredData) == 'BSSQ_6' ] <- 'BSSQ_salivation'
names (filteredData)[names (filteredData) == 'BSSQ_7' ] <- 'BSSQ_sweating'
names (filteredData)[names (filteredData) == 'BSSQ_8' ] <- 'BSSQ_nausea'
names (filteredData)[names (filteredData) == 'BSSQ_9' ] <- 'BSSQ_difficulty_concentrating'
names (filteredData)[names (filteredData) == 'BSSQ_10' ] <- 'BSSQ_fullness_of_head'
names (filteredData)[names (filteredData) == 'BSSQ_11' ] <- 'BSSQ_blurred_vision'
names (filteredData)[names (filteredData) == 'BSSQ_12' ] <- 'BSSQ_dizziness_o'
names (filteredData)[names (filteredData) == 'BSSQ_13' ] <- 'BSSQ_dizziness_c'
names (filteredData)[names (filteredData) == 'BSSQ_14' ] <- 'BSSQ_vertigo'
names (filteredData)[names (filteredData) == 'BSSQ_15' ] <- 'BSSQ_stomach_awareness'
names (filteredData)[names (filteredData) == 'ASSQ_1' ] <- 'ASSQ_discomfort'
names (filteredData)[names (filteredData) == 'ASSQ_2' ] <- 'ASSQ_fatigue'
names (filteredData)[names (filteredData) == 'ASSQ_3' ] <- 'ASSQ_headache'
names (filteredData)[names (filteredData) == 'ASSQ_4' ] <- 'ASSQ_eyestrain'
names (filteredData)[names (filteredData) == 'ASSQ_5' ] <- 'ASSQ_difficulty_focusing'
names (filteredData)[names (filteredData) == 'ASSQ_6' ] <- 'ASSQ_salivation'
names (filteredData)[names (filteredData) == 'ASSQ_7' ] <- 'ASSQ_sweating'
names (filteredData)[names (filteredData) == 'ASSQ_8' ] <- 'ASSQ_nausea'
names (filteredData)[names (filteredData) == 'ASSQ_9' ] <- 'ASSQ_difficulty_concentrating'
names (filteredData)[names (filteredData) == 'ASSQ_10' ] <- 'ASSQ_fullness_of_head'
names (filteredData)[names (filteredData) == 'ASSQ_11' ] <- 'ASSQ_blurred_vision'
names (filteredData)[names (filteredData) == 'ASSQ_12' ] <- 'ASSQ_dizziness_o'
names (filteredData)[names (filteredData) == 'ASSQ_13' ] <- 'ASSQ_dizziness_c'
names (filteredData)[names (filteredData) == 'ASSQ_14' ] <- 'ASSQ_vertigo'
names (filteredData)[names (filteredData) == 'ASSQ_15' ] <- 'ASSQ_stomach_awareness'
#isolating the data dictionary
data_dict = read_excel ("Project_1_Data.xlsx" , sheet = "data_dictionary" )
#filter into groups with experience or not
withVRexperience = filter (filteredData, VRexperience == 'Yes' )
noVRexperience = filter (filteredData, VRexperience == 'No' )
#taking averages for with/without experience
yes_avg_d_discomfort = mean (withVRexperience$ d_discomfort)
yes_avg_d_fatigue = mean (withVRexperience$ d_fatigue)
yes_avg_d_headache = mean (withVRexperience$ d_headache)
yes_avg_d_eyestrain = mean (withVRexperience$ d_eyestrain)
yes_avg_d_difficulty_focusing = mean (withVRexperience$ d_difficulty_focusing)
yes_avg_d_salivation = mean (withVRexperience$ d_salivation)
yes_avg_d_sweating = mean (withVRexperience$ d_sweating)
yes_avg_d_nausea = mean (withVRexperience$ d_nausea)
yes_avg_d_difficulty_concentrating = mean (withVRexperience$ d_difficulty_concentrating)
yes_avg_d_fullness = mean (withVRexperience$ d_fullness_of_head)
yes_avg_d_vision = mean (withVRexperience$ d_blurred_vision)
yes_avg_d_dizziness_o = mean (withVRexperience$ d_dizziness_o)
yes_avg_d_dizziness_c = mean (withVRexperience$ d_dizziness_c)
yes_avg_d_vertigo = mean (withVRexperience$ d_vertigo)
yes_avg_d_stomach = mean (withVRexperience$ d_stomach_awareness)
no_avg_d_discomfort = mean (noVRexperience$ d_discomfort)
no_avg_d_fatigue = mean (noVRexperience$ d_fatigue)
no_avg_d_headache = mean (noVRexperience$ d_headache)
no_avg_d_eyestrain = mean (noVRexperience$ d_eyestrain)
no_avg_d_difficulty_focusing = mean (noVRexperience$ d_difficulty_focusing)
no_avg_d_salivation = mean (noVRexperience$ d_salivation)
no_avg_d_sweating = mean (noVRexperience$ d_sweating)
no_avg_d_nausea = mean (noVRexperience$ d_nausea)
no_avg_d_difficulty_concentrating = mean (noVRexperience$ d_difficulty_concentrating)
no_avg_d_fullness = mean (noVRexperience$ d_fullness_of_head)
no_avg_d_vision = mean (noVRexperience$ d_blurred_vision)
no_avg_d_dizziness_o = mean (noVRexperience$ d_dizziness_o)
no_avg_d_dizziness_c = mean (noVRexperience$ d_dizziness_c)
no_avg_d_vertigo = mean (noVRexperience$ d_vertigo)
no_avg_d_stomach = mean (noVRexperience$ d_stomach_awareness)
#taking averages for different age groups
grp1 = filter (filteredData, age_group == '16 to 21' )
grp2 = filter (filteredData, age_group == '22 to 29' )
grp3 = filter (filteredData, age_group == '30 to 37' )
grp4 = filter (filteredData, age_group == '38 to 45' )
grp5 = filter (filteredData, age_group == 'above 45' )
mean_sqq_ages = c (mean (grp1$ ssq_full),
mean (grp2$ ssq_full),
mean (grp3$ ssq_full),
mean (grp4$ ssq_full),
mean (grp5$ ssq_full)
)
data_by_age_group = data.frame (
age_group = c ("16 to 21" , '22 to 29' ,'30 to 37' ,'38 to 45' ,'Above 45' ),
mean_ssq = mean_sqq_ages
)
Summary of Findings
(executive summary goes here)
Initial Data Analysis (IDA)
Source
Our data was sourced from Cosette Saunder’s PhD and honors thesis paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice ”. (ask about how much context we need to provide here)
Structure
The data contains 336 records of participants in the study, each with 51 variables. In particular, our project focused the following variables:
Baseline SSQ (BSSQ) of 16 symptoms (quantitative, discrete) : self-reported symptom severity of participants before undergoing VR, on a scale of 1 to 10.
Active SSQ (ASSQ) of 16 symptoms (quantitative, discrete) : self-reported symptom severity of participants after undergoing VR, on a scale of 1 to 10
The age of the participants (quantitative, discrete) ; they were then sorted into age groups – re-classed as ‘factor’ (qualitative, ordinal)
This was to allow the relationship between age groups and symptoms reported to be seen
Whether the participant has had VRexperience (qualitative, nominal) ; this was reclassified from ‘character’ into ‘factor’.
R misidentified this as ‘chr’, it should be a qualitative variable
The change (\(\Delta\) ) between baseline (before) and active (after) SSQ was calculated for each participant for each symptom, and now we take the average \(\Delta\) for each symptom for each group. (quantitative, discrete)
Code
data_age_groups = select (filteredData, age_group)
groups_counted = data_age_groups %>% count (age_group)
age_pie = plot_ly (groups_counted, labels = ~ age_group, values = ~ n,
type = 'pie' )
age_pie <- age_pie %>% layout (title = 'Distribution of ages' ,
showlegend = TRUE )
age_pie
Code
library (RColorBrewer)
data_experience = select (filteredData, VRexperience)
exp_counted = data_experience %>% count (VRexperience)
vr_pie = plot_ly (exp_counted, labels = ~ VRexperience, values = ~ n,
type = 'pie' )
vr_pie <- vr_pie %>% layout (title = 'Distribution of VR experience' ,
showlegend = TRUE )
vr_pie
Limitations
A limitation faced with this data set is the fact that the symptoms and their severity, for both baseline and active SSQs are self-reported, meaning that the interpretation of the scale can vary from person to person, making the reported values subjective and less reliable.
Another limitation is that VR experience is simply classified by a ‘yes’ or ‘no’, which is not very descriptive of how much exposure to VR the participant has had in the past, or how recent that experience was. However, it was mentioned that participants who had used VR more than 10 times were excluded from the study.
Assumptions
We assumed that the participants were truthful and correct about whether or not they have participated in VR before. We also assumed that the participants followed the instructions in the symptoms survey correctly and reported their symptoms on a scale of 1 to 10, with 1 being the least severe and 10 being the most.
Research Question
What is the effect of having VR experience on the symptoms experienced by people?
Code
library (plotly)
yes_means = c (yes_avg_d_discomfort,
yes_avg_d_fatigue,
yes_avg_d_headache,
yes_avg_d_eyestrain,
yes_avg_d_sweating,
yes_avg_d_nausea,
yes_avg_d_dizziness_c,
yes_avg_d_dizziness_o,
yes_avg_d_difficulty_focusing,
yes_avg_d_difficulty_concentrating,
yes_avg_d_salivation,
no_avg_d_vision
)
no_means = c (no_avg_d_discomfort,
no_avg_d_fatigue,
no_avg_d_headache,
no_avg_d_eyestrain,
no_avg_d_sweating,
no_avg_d_nausea,
no_avg_d_dizziness_c,
no_avg_d_dizziness_o,
no_avg_d_difficulty_focusing,
no_avg_d_difficulty_concentrating,
no_avg_d_salivation,
no_avg_d_vision
)
fig <- plot_ly (
type = 'scatterpolar' ,
fill = 'toself'
)
fig <- fig %>%
add_trace (
r = yes_means,
theta = c ('Discomfort' , 'Fatigue' , 'Headache' , 'Eyestrain' , 'Sweating' , 'Nausea' , 'Dizziness (closed)' , 'Dizziness (open)' , 'Difficulty Focusing' , 'Difficulty Concentrating' , 'Salivation' , 'Blurry Vision' ),
name = 'With VR Experience'
)
fig <- fig %>%
add_trace (
r = no_means,
theta = c ('Discomfort' , 'Fatigue' , 'Headache' , 'Eyestrain' , 'Sweating' , 'Nausea' ,'Dizziness (closed)' , 'Dizziness (open)' , 'Difficulty Focusing' , 'Difficulty Concentrating' , 'Salivation' , 'Blurry Vision' ),
name = 'No VR Experience'
)
fig <- fig %>%
layout (
polar = list (
radialaxis = list (
visible = T,
range = c (- 0.5 ,2 )
)
),
showlegend = T
)
fig
We filtered data by weather the participant had VR experience or not, and for each symptom, we took the average change between the BSSQ and ASSQ . The spider chart above visualizes this. From initial observations, we can see that both groups experience similar \(\Delta\) (change) in most symptoms. Interestingly, those with VR experience seem to report a greater increase in most symptoms when compared to those without VR experience.
Code
library (plotly)
library (tidyselect)
mean_sqq_ages = c (mean (grp1$ ssq_full),
mean (grp2$ ssq_full),
mean (grp3$ ssq_full),
mean (grp4$ ssq_full),
mean (grp5$ ssq_full)
)
fig2 <- plot_ly (
type = 'scatterpolar' ,
fill = 'toself' ,
r = mean_sqq_ages,
theta = c ("16 to 21" ,
"22 to 29" ,
"30 to 37" ,
"38 to 45" ,
"above 45" )
)
fig2 <- fig2 %>%
layout (
polar = list (
radialaxis = list (
visible = T,
range = c (0 ,20 )
)
),
showlegend = T
)
fig2
No scatterpolar mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Code
fig3 = ggplot (data_by_age_group, aes (x = age_group, y = mean_ssq)) +
geom_bar (stat = 'identity' )
ggplotly (fig3)
Code
library (tidyverse)
p = ggplot (filteredData, aes (x = age, y = ssq_full, colour = VRexperience)) +
geom_point ()
p
Code
plt = ggplot (filteredData, aes (x = VRexperience, y = ssq_full)) +
geom_boxplot ()
ggplotly (plt)
Code
plt2 = ggplot (filteredData, aes (x = age_group, y = ssq_full)) +
geom_boxplot ()
ggplotly (plt2)
Code
library (tidyverse)
yex = filter (filteredData, VRexperience == "Yes" )
nox = filter (filteredData, VRexperience == "No" )
q1 = quantile (yex$ age, probs = c (0.25 ), na.rm = TRUE ) |> as.numeric ()
q2 = quantile (nox$ age, probs = c (0.25 ), na.rm = TRUE ) |> as.numeric ()
# https://forum.posit.co/t/make-the-r-function-quantile-return-a-numeric-value/178084
q3 = 2 * q2- q1
ggplot (filteredData, aes (x = VRexperience, y = age)) + geom_boxplot () + geom_hline (yintercept = q1) + geom_hline (yintercept = q2) + geom_hline (yintercept = q3)
Code
filteredData = mutate (filteredData, age_class = case_when (age > q3 ~ "older" ,
age > q2 ~ "old" ,
age > q1 ~ "young" ,
age > 0 ~ "younger" ))
ggplot (filter (filteredData, age_class == "old" | age_class == "young" ), aes (x = age_class, y = ssq_full)) + geom_boxplot ()